{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import seaborn as sns\n",
    "import matplotlib.pyplot as plt\n",
    "import matplotlib as mpl\n",
    "from dautil import report\n",
    "from dautil import plotting\n",
    "import numpy as np\n",
    "from tabulate import tabulate\n",
    "import urllib"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "def aggregate():\n",
    "    df = sns.load_dataset(\"anscombe\")\n",
    "\n",
    "    agg = df.groupby('dataset')\\\n",
    "             .agg([np.mean, np.var])\\\n",
    "             .transpose()\n",
    "    groups = df.groupby('dataset')\n",
    "\n",
    "    corr = [g.corr()['x'][1] for _, g in groups]\n",
    "    builder = report.DFBuilder(agg.columns)\n",
    "    builder.row(corr)\n",
    "\n",
    "    fits = [np.polyfit(g['x'], g['y'], 1) for _, g in groups]\n",
    "    builder.row([f[0] for f in fits])\n",
    "    builder.row([f[1] for f in fits])\n",
    "    bottom = builder.build(['corr', 'slope', 'intercept'])\n",
    "\n",
    "    return df, pd.concat((agg, bottom))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "def generate(table):\n",
    "    writer = report.RSTWriter()\n",
    "    writer.h1('Anscombe Statistics')\n",
    "    writer.add(tabulate(table, tablefmt='html', floatfmt='.3f'))\n",
    "    \n",
    "    return writer.rst"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "def plot(df):\n",
    "    sns.set(style=\"ticks\")\n",
    "    g = sns.lmplot(x=\"x\", y=\"y\", col=\"dataset\", hue=\"dataset\", data=df,\n",
    "               col_wrap=2, ci=None, palette=\"muted\", size=4,\n",
    "               scatter_kws={\"s\": 50, \"alpha\": 1})\n",
    "\n",
    "    plotting.embellish(g.fig.axes)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "df, table = aggregate()\n",
    "from IPython.display import display_markdown\n",
    "display_markdown(generate(table), raw=True)\n",
    "%matplotlib inline\n",
    "plot(df)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.4.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
